Letter abbreviations:
- <int> stands for integers.
- <dbl> stands for real numbers.
- <chr> stands for character vectors.
- <dttm> stands for date-times.
View(nycflights13::flights)
1a.
filter(flights,arr_delay >= 120)
1b.
filter(flights,dest %in% c("IAH","HOU"))
1c.
filter(flights, carrier %in% c("AA","DL","UA"))
1d.
filter(flights, month %in% 7:9)
1e.
filter(flights, arr_delay > 120, dep_delay <= 0)
1f.
filter(flights, dep_delay >= 60, (dep_delay - arr_delay > 30))
1g.
filter(flights, dep_time >= 2400 | dep_time <= 600)
<between> is a shortcut for a<=x<=b.filter(flights, between(dep_time, 600 ,2400))
sum(is.na(flights$dep_time))
[1] 8255
map_dbl(flights, ~ sum(is.na(.x)))
year month
0 0
day dep_time
0 8255
sched_dep_time dep_delay
0 8255
arr_time sched_arr_time
8713 0
arr_delay carrier
9430 0
flight tailnum
0 2512
origin dest
0 0
air_time distance
9430 0
hour minute
0 0
time_hour
0
Airtime, arr_delay, dep_delay and tail number have missing values.
arrange(flights, dep_delay)
arrange(flights, desc(dep_delay))
arrange(flights, air_time)
# Shortest
flights %>%
arrange(air_time) %>%
select(flight, air_time)
# Fastest
flights %>%
arrange(-air_time) %>%
select(flight, air_time)
select(flights, dep_time, dep_delay, arr_time, arr_delay)
select(flights, starts_with("dep"), starts_with("arr"))
select_(flights, "dep_time", "dep_delay", "arr_time", "arr_delay")
select(flights, arr_delay,arr_delay)
Nothing, only returns once.
vars <- c("year", "month", "day", "dep_delay", "arr_delay")
select(flights, one_of(vars))
Unsure why helpful.
select(flights, contains("TIME"))
Unsure.